Load the PM data and project using CA Albers projection.
library(ggplot2)
library(leaflet)
library(proj4)
library(geoR)
library(dplyr)
library(fields)
library(mgcv)
pm25 = read.csv("/Users/mf/Dropbox (University of Southern California)/Courses/PM569/Fall 2019/data/pm25_ca_2018.csv")
pm25.sites = read.csv("/Users/mf/Dropbox (University of Southern California)/Courses/PM569/Fall 2019/data/pm25_sites.csv")
pm_ca<-merge(pm25 %>% filter(date=="11/10/18"), pm25.sites, by='site_id')
proj_albers<-"+proj=aea +lat_1=34.0 +lat_2=40.5 +lon_0=-120.0 +x_0=0 +y_0=-4000000 +ellps=GRS80 +datum=NAD83 +units=km"
newcoords<-project(as.matrix(cbind(pm_ca$longitude, pm_ca$latitude)), proj=proj_albers)
pm_ca$x<-newcoords[,1]
pm_ca$y<-newcoords[,2]
Make a map of the PM2.5 concentrations
pm.pal = colorNumeric(c('darkgreen','goldenrod','brown','brown'),
domain=pm_ca$pm25)
leaflet(pm_ca) %>%
addProviderTiles('CartoDB.Positron') %>%
addCircles(lat=~latitude,lng=~longitude,label=~paste0(pm25, ' ug/m3'), color=~pm.pal(pm25),
opacity=1, fillOpacity=1, radius=500) %>%
addLegend('bottomleft', pal=pm.pal, values=pm_ca$pm25,
title='PM2.5 (ug/m3)', opacity=1)
Based on our previous results we will use the log PM2.5 concentrations and the Albers projected coordinates.
# log pm25 better, set up geodata object
pm_ca$logpm25<-log(pm_ca$pm25)
lpm_geo_alb<-as.geodata(pm_ca,coords.col=c(7,8),data.col=9)
plot(lpm_geo_alb)
summary(lpm_geo_alb)
## Number of data points: 68
##
## Coordinates summary
## x y
## min -354.9670 -599.0421
## max 424.1254 415.1952
##
## Distance summary
## min max
## 3.607555 1187.488641
##
## Data summary
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.481605 2.314549 2.886934 2.972054 3.385151 5.204556
Create a grid for kriging/interpolation
res=60
xs=seq(min(pm_ca$x),max(pm_ca$x),len=res)
ys=seq(min(pm_ca$y),max(pm_ca$y),len=res)
myGrid=expand.grid(xs,ys)
names(myGrid)=c('x','y')
# make sure it looks correct
plot(myGrid, pch=19, cex=0.5)
Let’s krig using the binned semivariogram fit via WLS
# binned semivariogram with C-H robust
# max dist 200 km, use 12 bins
vario2<-variog(lpm_geo_alb,uvec=seq(0,200,l=12),option="bin", estimator.type="modulus")
## variog: computing omnidirectional variogram
plot(vario2,xlab="Distance (h), km")
# fitting semivariogram by wls
# change model type to gaussian
vfit_wls2=variofit(vario2,ini.cov.pars=c(0.5,100),nugget=0.05,fix.nugget=FALSE,cov.model='spherical',weights='cressie')
## variofit: covariance model used is spherical
## variofit: weights used: cressie
## variofit: minimisation function used: optim
plot(vario2,xlab="Distance (h), km")
lines(vfit_wls2,col="cyan",lwd=1.5)
summary(vfit_wls2)
## $pmethod
## [1] "WLS (weighted least squares)"
##
## $cov.model
## [1] "spherical"
##
## $spatial.component
## sigmasq phi
## 0.6196633 250.6438244
##
## $spatial.component.extra
## kappa
## 0.5
##
## $nugget.component
## tausq
## 0.01921007
##
## $fix.nugget
## [1] FALSE
##
## $fix.kappa
## [1] TRUE
##
## $practicalRange
## [1] 250.6438
##
## $sum.of.squares
## value
## 15.11108
##
## $estimated.pars
## tausq sigmasq phi
## 0.01921007 0.61966327 250.64382442
##
## $weights
## [1] "cressie"
##
## $call
## variofit(vario = vario2, ini.cov.pars = c(0.5, 100), cov.model = "spherical",
## fix.nugget = FALSE, nugget = 0.05, weights = "cressie")
##
## attr(,"class")
## [1] "summary.variomodel"
# Simple kriging (constant known mean must be specified)
KCsimple<-krige.control(type.krige="sk",obj.m=vfit_wls2,beta=2)
# locations= are the unobserved locations where we want to make predictions
simple_krig<-krige.conv(lpm_geo_alb,locations=myGrid,krige=KCsimple, output=output.control(signal=TRUE))
## krige.conv: model with constant mean
## krige.conv: Kriging performed using global neighbourhood
# plot gridded predictions
image.plot(xs,ys,matrix(simple_krig$predict,res,res,byrow=FALSE),col=tim.colors(32), main="Simple Kriging")
# ordinary kriging, default in krig.control
KCord<-krige.control(type.krige='ok',obj.m=vfit_wls2)
ordinary_krige<-krige.conv(lpm_geo_alb,locations=myGrid,krige=KCord,output=output.control(signal=TRUE))
## krige.conv: model with constant mean
## krige.conv: Kriging performed using global neighbourhood
# plot gridded predictions
image.plot(xs,ys,matrix(ordinary_krige$predict,res,res,byrow=FALSE),col=tim.colors(32), main="Ordinary Kriging")
# plot standard errors
image.plot(xs,ys,matrix(sqrt(ordinary_krige$krige.var),res,res,byrow=FALSE),col=tim.colors(32))
Let’s krig using all of the data (cloud semivariogram)
mlfit_gau=likfit(lpm_geo_alb,ini.cov.pars=c(0.5,100),nugget=0.05, fix.nugget=FALSE,cov.model='gaussian', lik.method='ML')
## kappa not used for the gaussian correlation function
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
summary(mlfit_gau)
## Summary of the parameter estimation
## -----------------------------------
## Estimation method: maximum likelihood
##
## Parameters of the mean component (trend):
## beta
## 2.9326
##
## Parameters of the spatial component:
## correlation function: gaussian
## (estimated) variance parameter sigmasq (partial sill) = 0.6781
## (estimated) cor. fct. parameter phi (range parameter) = 129.5
## anisotropy parameters:
## (fixed) anisotropy angle = 0 ( 0 degrees )
## (fixed) anisotropy ratio = 1
##
## Parameter of the error component:
## (estimated) nugget = 0.1024
##
## Transformation parameter:
## (fixed) Box-Cox parameter = 1 (no transformation)
##
## Practical Range with cor=0.05 for asymptotic range: 224.2018
##
## Maximised Likelihood:
## log.L n.params AIC BIC
## "-49.86" "4" "107.7" "116.6"
##
## non spatial model:
## log.L n.params AIC BIC
## "-87.11" "2" "178.2" "182.7"
##
## Call:
## likfit(geodata = lpm_geo_alb, ini.cov.pars = c(0.5, 100), fix.nugget = FALSE,
## nugget = 0.05, cov.model = "gaussian", lik.method = "ML")
# uni.only=FALSE means it will calculate 2-D profiles, which is not necessary, remove or set to true for faster computation
prof <- proflik(mlfit_gau, geodata = lpm_geo_alb, sill.values = seq(0.4,0.8,l=5), range.values = seq(50, 150, l=5), nugget.values=seq(0.05,0.2,l=5), uni.only = FALSE)
## proflik: computing profile likelihood for the sill
## proflik: computing profile likelihood for the range
## proflik: computing profile likelihood for the nugget
## proflik: computing 2-D profile likelihood for the sill and range parameters
## proflik: computing 2-D profile likelihood for the sill and nugget
## proflik: computing 2-D profile likelihood for the range and nugget
par(mfrow=c(2,3))
plot(prof)
# parameters with highest likelihood
prof$range$est.range[1]
## [1] 129.5351
prof$sill$est.sill[1]
## [1] 0.678105
prof$nugget$est.nugget[1]
## [1] 0.1024372
# Kriging by Maximum likelihood
# Simple kriging (constant known mean must be specified)
KCsimple_ml<-krige.control(type.krige="sk",obj.m=mlfit_gau,beta=2)
# locations= are the unobserved locations where we want to make predictions
simple_krig_ml<-krige.conv(lpm_geo_alb,locations=myGrid,krige=KCsimple_ml)
## krige.conv: model with constant mean
## krige.conv: Kriging performed using global neighbourhood
par(mfrow=c(1,4))
#plot on grid
image.plot(xs,ys,matrix(simple_krig_ml$predict,res,res,byrow=FALSE),col=tim.colors(32), main="Simple Kriging")
#to save kriging predicted vals
myGrid2<-data.frame(myGrid,simple_krig$predict, simple_krig$krige.var)
# Ordinary kriging (constant unknown mean)
KCord_ml<-krige.control(type.krige='ok',obj.m=mlfit_gau)
ordinary_krige_ml<-krige.conv(lpm_geo_alb,locations=myGrid,krige=KCord_ml)
## krige.conv: model with constant mean
## krige.conv: Kriging performed using global neighbourhood
# plot gridded predictions
image.plot(xs,ys,matrix(ordinary_krige_ml$predict,res,res,byrow=FALSE),col=tim.colors(32), main="Ordinary Kriging by ML")
# plot standard errors
image.plot(xs,ys,matrix(sqrt(ordinary_krige_ml$krige.var),res,res,byrow=FALSE),col=tim.colors(32), main="Ordinary Kriging ML Errors")
# doing it in one step using the best profile likelihood estimates
krige_ml2 <- krige.conv(lpm_geo_alb, loc=myGrid,
krige=krige.control(type.krige="ok",cov.pars=c(0.678105, 129.5351), nugget=0.1024372))
## krige.conv: model with constant mean
## krige.conv: Kriging performed using global neighbourhood
image.plot(xs,ys,matrix(krige_ml2$predict,res,res,byrow=FALSE),col=tim.colors(32), main="Ordinary Kriging by ML")
# check trends first
summary(lm(logpm25~x+y,data=pm_ca))
##
## Call:
## lm(formula = logpm25 ~ x + y, data = pm_ca)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1269 -0.3841 0.0983 0.4314 1.7051
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.1276876 0.1341716 23.311 < 2e-16 ***
## x -0.0034878 0.0010304 -3.385 0.00121 **
## y -0.0006420 0.0007108 -0.903 0.36973
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7216 on 65 degrees of freedom
## Multiple R-squared: 0.3441, Adjusted R-squared: 0.324
## F-statistic: 17.05 on 2 and 65 DF, p-value: 1.113e-06
summary(lm(logpm25~x+I(x^2)+y+I(y^2)+ I(x*y),data=pm_ca))
##
## Call:
## lm(formula = logpm25 ~ x + I(x^2) + y + I(y^2) + I(x * y), data = pm_ca)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.65265 -0.34489 -0.04261 0.35548 1.32363
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.507e+00 1.361e-01 25.772 < 2e-16 ***
## x -7.441e-03 1.370e-03 -5.433 9.86e-07 ***
## I(x^2) -2.673e-05 6.697e-06 -3.992 0.000176 ***
## y -4.843e-03 9.773e-04 -4.956 5.88e-06 ***
## I(y^2) -1.815e-05 3.215e-06 -5.644 4.40e-07 ***
## I(x * y) -3.775e-05 8.505e-06 -4.438 3.80e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5736 on 62 degrees of freedom
## Multiple R-squared: 0.6047, Adjusted R-squared: 0.5728
## F-statistic: 18.97 on 5 and 62 DF, p-value: 2.14e-11
mlfit_trend_gau=likfit(lpm_geo_alb,ini.cov.pars=c(0.5,100),nugget=0.05, fix.nugget=FALSE, cov.model='gaussian', lik.method='ML',trend= '1st' )
## kappa not used for the gaussian correlation function
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
summary(mlfit_trend_gau)
## Summary of the parameter estimation
## -----------------------------------
## Estimation method: maximum likelihood
##
## Parameters of the mean component (trend):
## beta0 beta1 beta2
## 2.8429 -0.0041 -0.0015
##
## Parameters of the spatial component:
## correlation function: gaussian
## (estimated) variance parameter sigmasq (partial sill) = 0.4671
## (estimated) cor. fct. parameter phi (range parameter) = 75.57
## anisotropy parameters:
## (fixed) anisotropy angle = 0 ( 0 degrees )
## (fixed) anisotropy ratio = 1
##
## Parameter of the error component:
## (estimated) nugget = 0.0654
##
## Transformation parameter:
## (fixed) Box-Cox parameter = 1 (no transformation)
##
## Practical Range with cor=0.05 for asymptotic range: 130.8012
##
## Maximised Likelihood:
## log.L n.params AIC BIC
## "-46.3" "6" "104.6" "117.9"
##
## non spatial model:
## log.L n.params AIC BIC
## "-72.77" "4" "153.5" "162.4"
##
## Call:
## likfit(geodata = lpm_geo_alb, trend = "1st", ini.cov.pars = c(0.5,
## 100), fix.nugget = FALSE, nugget = 0.05, cov.model = "gaussian",
## lik.method = "ML")
# suppose you only want to add trend in one direction (x or y). Here x is significant in linear trend check.
mlfit_trendx_gau=likfit(lpm_geo_alb,ini.cov.pars=c(0.5,100),nugget=0.05, fix.nugget=FALSE, cov.model='gaussian', lik.method='ML',trend= ~lpm_geo_alb$coords[,1] )
## kappa not used for the gaussian correlation function
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
# kriging with linear trend
kriged_grid_trend=krige.conv(lpm_geo_alb,locations=myGrid,krige=krige.control(obj.model=mlfit_trend_gau,trend.d='1st',trend.l='1st'))
## krige.conv: model with mean given by a 1st order polynomial on the coordinates
## krige.conv: Kriging performed using global neighbourhood
# kriging with quadratic trend
mlfit_trend2_gau=likfit(lpm_geo_alb,ini.cov.pars=c(0.5,100),nugget=0.05, fix.nugget=FALSE, cov.model='gaussian', lik.method='ML',trend= '2nd' )
## kappa not used for the gaussian correlation function
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
# kriging with quadratic trend (seems to be a better fit)
kriged_grid_trend2=krige.conv(lpm_geo_alb,locations=myGrid,krige=krige.control(obj.model=mlfit_trend2_gau,trend.d='2nd',trend.l='2nd'))
## krige.conv: model with mean given by a 2nd order polynomial on the coordinates
## krige.conv: Kriging performed using global neighbourhood
# have problem with negative predictions, set them to 0
kriged_grid_trend2$predict2<-ifelse(kriged_grid_trend2$predict<0,0,kriged_grid_trend2$predict)
# plot gridded predictions
image.plot(xs,ys,matrix(kriged_grid_trend2$predict2,res,res,byrow=FALSE),col=tim.colors(32), main="Universal Kriging by ML Quad Trend")
Displaying the kriging predictions on a leaflet map requires:
proj_albers<-"+proj=aea +lat_1=34.0 +lat_2=40.5 +lon_0=-120.0 +x_0=0 +y_0=-4000000 +ellps=GRS80 +datum=NAD83 +units=km"
myGrid_ll<- project(myGrid, proj_albers, inverse=TRUE)
preds = data.frame(myGrid_ll, pred.pm=exp(kriged_grid_trend2$predict2) %>% as.vector)
pred.pal = colorNumeric(c('darkgreen','gold1','brown'),
domain=preds$pred.pm)
leaflet(preds) %>%
addProviderTiles("CartoDB.Positron") %>%
addCircles(lng=~x, lat=~y, color=~pred.pal(pred.pm),
opacity=.7, fillOpacity=.7, radius=1e3) %>% # points w/ 1KM radius
addLegend('bottomleft', pal=pred.pal, values=preds$pred.pm,
title="Pred. PM2.5", opacity=.5)
We can conduct universal kriging with covariates (sometimes called spatial regression), and it may include trend plus covariates. To illustrate we first simulate an elevation variable.
# Create of random elevation variable to use as a spatially varying covariate in universal kriging
pm_ca$elev<-runif(68,0,2000)
# need to add covariate to geodata obj
lpmelev_geo_alb<-as.geodata(pm_ca,coords.col=c(7,8), data.col=9, covar.col=10, covar.names="elev")
plot(lpmelev_geo_alb)
# check association
mod_elev<-lm(logpm25~elev,dat=pm_ca)
summary(mod_elev)
##
## Call:
## lm(formula = logpm25 ~ elev, data = pm_ca)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.46496 -0.67533 -0.06067 0.38534 2.20571
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.015e+00 2.126e-01 14.181 <2e-16 ***
## elev -4.428e-05 1.875e-04 -0.236 0.814
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8839 on 66 degrees of freedom
## Multiple R-squared: 0.0008441, Adjusted R-squared: -0.01429
## F-statistic: 0.05576 on 1 and 66 DF, p-value: 0.8141
mlfit_elev_gau=likfit(lpmelev_geo_alb,ini.cov.pars=c(0.5,100),nugget=0.05, fix.nugget=FALSE, cov.model='gaussian', lik.method='ML',trend= ~elev)
## kappa not used for the gaussian correlation function
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
summary(mlfit_elev_gau)
## Summary of the parameter estimation
## -----------------------------------
## Estimation method: maximum likelihood
##
## Parameters of the mean component (trend):
## beta0 beta1
## 3.0087 -0.0001
##
## Parameters of the spatial component:
## correlation function: gaussian
## (estimated) variance parameter sigmasq (partial sill) = 0.6924
## (estimated) cor. fct. parameter phi (range parameter) = 130.9
## anisotropy parameters:
## (fixed) anisotropy angle = 0 ( 0 degrees )
## (fixed) anisotropy ratio = 1
##
## Parameter of the error component:
## (estimated) nugget = 0.1005
##
## Transformation parameter:
## (fixed) Box-Cox parameter = 1 (no transformation)
##
## Practical Range with cor=0.05 for asymptotic range: 226.6275
##
## Maximised Likelihood:
## log.L n.params AIC BIC
## "-49.47" "5" "108.9" "120"
##
## non spatial model:
## log.L n.params AIC BIC
## "-87.08" "3" "180.2" "186.8"
##
## Call:
## likfit(geodata = lpmelev_geo_alb, trend = ~elev, ini.cov.pars = c(0.5,
## 100), fix.nugget = FALSE, nugget = 0.05, cov.model = "gaussian",
## lik.method = "ML")
KCelev<-krige.control(obj.m=mlfit_elev_gau,trend.d= ~elev,trend.l=~elev)
#universal_krige2<-krige.conv(lpmelev_geo_alb,locations=myGrid,krige=KCelev)
# Note we cannot do this because we don't have trend values (elevation) at all of the prediction points.
# universal krigging with trend and covariate predict at point locations
mlfit_elev2_gau=likfit(lpmelev_geo_alb,ini.cov.pars=c(0.5,100),nugget=0.05, fix.nugget=FALSE, cov.model='gaussian', lik.method='ML',trend= trend.spatial("2nd", lpmelev_geo_alb, add=~elev))
## kappa not used for the gaussian correlation function
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
summary(mlfit_elev2_gau)
## Summary of the parameter estimation
## -----------------------------------
## Estimation method: maximum likelihood
##
## Parameters of the mean component (trend):
## beta0 beta1 beta2 beta3 beta4 beta5 beta6
## 3.5150 -0.0077 -0.0049 0.0000 0.0000 0.0000 -0.0001
##
## Parameters of the spatial component:
## correlation function: gaussian
## (estimated) variance parameter sigmasq (partial sill) = 0.2761
## (estimated) cor. fct. parameter phi (range parameter) = 36.88
## anisotropy parameters:
## (fixed) anisotropy angle = 0 ( 0 degrees )
## (fixed) anisotropy ratio = 1
##
## Parameter of the error component:
## (estimated) nugget = 0.0295
##
## Transformation parameter:
## (fixed) Box-Cox parameter = 1 (no transformation)
##
## Practical Range with cor=0.05 for asymptotic range: 63.82743
##
## Maximised Likelihood:
## log.L n.params AIC BIC
## "-38.42" "10" "96.84" "119"
##
## non spatial model:
## log.L n.params AIC BIC
## "-55.02" "8" "126" "143.8"
##
## Call:
## likfit(geodata = lpmelev_geo_alb, trend = trend.spatial("2nd",
## lpmelev_geo_alb, add = ~elev), ini.cov.pars = c(0.5, 100),
## fix.nugget = FALSE, nugget = 0.05, cov.model = "gaussian",
## lik.method = "ML")
This is the setup for creating a 70-30 traning testing split:
# Split data into test and training sets (70-30)
set.seed(123)
s<-sample(1:nrow(pm_ca),dim(pm_ca)[1]*.7,replace=FALSE)
temp<-1:length(pm_ca$logpm25)
temp[s]<-0
testset<-temp[temp!=0]
test<-pm_ca[testset,]
train<-pm_ca[s,]
Interpolation with inverse distance weighting (IDW) is a simple non-statistical interpolation (weighting) method.
# look at distances with two functions dist and rdist. rdist is in fields library
# distances within PM locations
# use projected coordinates here
pm_dists<-dist(newcoords)
# distances between PM locations and grid
pm_new_dists<-rdist(newcoords,myGrid)
#inverse distance weighting function
idw<-function(data,locs,newlocs,p){
dists<-rdist(newlocs,locs)
return(((dists^(-p))%*%data)/((dists^(-p))%*%rep(1,length(data))))
}
# testing different p's
idwPred2<-idw(pm_ca$logpm25,cbind(pm_ca$x,pm_ca$y),myGrid,p=1)
idwPred6<-idw(pm_ca$logpm25,cbind(pm_ca$x,pm_ca$y),myGrid,p=6)
idwPred12<-idw(pm_ca$logpm25,cbind(pm_ca$x,pm_ca$y),myGrid,p=12)
# plotting the results on our grid
image.plot(xs,ys,matrix(idwPred2,res,res),col=tim.colors(32))
points(pm_ca$x,pm_ca$y,pch=19,cex=0.3)
image.plot(xs,ys,matrix(idwPred6,res,res),col=tim.colors(32))
points(pm_ca$x,pm_ca$y,pch=19,cex=0.3)
image.plot(xs,ys,matrix(idwPred12,res,res),col=tim.colors(32))
points(pm_ca$x,pm_ca$y,pch=19,cex=0.3)
Generalized additive models (GAM) are powerful non-parametric representations of data (sometimes called functional data). We concentrate on non-parametric functions of our predictor variables.
co2<-read.csv("/Users/mf/Dropbox (University of Southern California)/Courses/PM569/Data/co2_mm_mlo.csv")
# CO2 data for time series illustration of basis functions
plot(co2$date_dec,co2$co2,type='l')
co2_2018<-co2[co2$year==2018,]
co2_1998<-co2[co2$year==1998,]
plot(co2_2018$co2)
# Simple polynomial basis
lm_co2=lm(co2~month+I(month^2)+I(month^3),data=co2_2018)
lm_co2$coefficients
## (Intercept) month I(month^2) I(month^3)
## 403.12161616 4.60834295 -0.84445943 0.04178969
# Fit polynomial result
month<-seq(1:12)
y <- 403.12161616+ 4.60834295*month -0.84445943*month^2 + 0.04178969*month^3
plot(co2_2018$co2, ylab="CO2, ppm",xlab="Month")
lines(y)
# Use polynomial result to predict for different year
poly_pred<-predict(lm_co2,newdat=co2_1998)
plot(co2_1998$co2,ylim=c(360,410))
lines(poly_pred,col='red')
# not a good fit because we didn't take longer term trend into account
# Using cubic regression spline bases with 4 knots
gam_co2<-gam(co2~s(month,bs="cr"),data=co2_2018)
plot(gam_co2)
# try fitting to all data and smoothing date (overall trends) and month (to get within year trends)
gam_co2_all<-gam(co2~s(date_dec,bs="cr",k=20)+s(month,bs="cc"),data=co2)
summary(gam_co2_all)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## co2 ~ s(date_dec, bs = "cr", k = 20) + s(month, bs = "cc")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 354.49606 0.01675 21158 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(date_dec) 18.587 18.98 147276 <2e-16 ***
## s(month) 7.231 8.00 1898 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 1 Deviance explained = 100%
## GCV = 0.21498 Scale est. = 0.20717 n = 738
# predict on data
pred_co2<-predict.gam(gam_co2_all,co2)
plot(pred_co2,type='l')
# Thin plate splines (2-D)
# Let gam() find number of knots
gam_pm<-gam(logpm25~s(x,y,bs="ts"),data=pm_ca)
plot(gam_pm, main="GAM default k")
summary(gam_pm)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## logpm25 ~ s(x, y, bs = "ts")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.97205 0.03935 75.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(x,y) 22.31 29 14.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.863 Deviance explained = 90.9%
## GCV = 0.16025 Scale est. = 0.10532 n = 68
# Change number of knots so it is more smooth (larger k)
gam_pm2<-gam(logpm25~s(x,y,bs="ts",k=60),data=pm_ca)
plot(gam_pm2, main="GAM large k")
summary(gam_pm2)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## logpm25 ~ s(x, y, bs = "ts", k = 60)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.97205 0.01101 270 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(x,y) 57.25 59 105.6 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.989 Deviance explained = 99.8%
## GCV = 0.057487 Scale est. = 0.0082417 n = 68
# Use fitted gam object to predict smooth in x,y at grid locations
pred_gam<-predict.gam(gam_pm2,myGrid,se.fit=TRUE)
# Plot predictions
image.plot(xs,ys,matrix(pred_gam$fit,res,res),col=tim.colors(32))
points(pm_ca$x,pm_ca$y,pch=19,cex=0.3)
# Standard errors of predictions
image.plot(xs,ys,matrix(pred_gam$se.fit,res,res),col=tim.colors(32))
# Can also add linear and/or smooth covariates
gam_pm3<-gam(logpm25~s(elev)+s(x,y,bs="ts",k=50),data=pm_ca)
summary(gam_pm3)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## logpm25 ~ s(elev) + s(x, y, bs = "ts", k = 50)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.97205 0.02718 109.4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(elev) 1.00 1 1.676 0.206
## s(x,y) 39.22 49 19.937 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.935 Deviance explained = 97.4%
## GCV = 0.12752 Scale est. = 0.050225 n = 68
#pred_gam_new3<-predict.gam(gam_pm3,myGrid,se.fit=TRUE)
#image.plot(xs,ys,matrix(pred_gam3$fit,res,res),col=tim.colors(32))
# Note we also have the issue here where we can't predict where don't have elevation.
# make nice map of gam predictions
preds = data.frame(myGrid_ll, pred.pm=exp(pred_gam$fit) %>% as.vector)
pred.pal = colorNumeric(c('darkgreen','gold1','brown'),
domain=preds$pred.pm)
leaflet(preds) %>%
addProviderTiles("CartoDB.Positron") %>%
addCircles(lng=~x, lat=~y, color=~pred.pal(pred.pm),
opacity=.7, fillOpacity=.7, radius=1e3) %>% # points w/ 1KM radius
addLegend('bottomleft', pal=pred.pal, values=preds$pred.pm,
title="GAM Pred. PM2.5", opacity=.5)